/*
 Copyright 2013--Present JMM_PROGNAME
 
 This file is distributed under the terms of the JMM_PROGNAME License.
 
 You should have received a copy of the JMM_PROGNAME License.
 If not, see <JMM_PROGNAME WEBSITE>.
*/
// CREATED    : 9/15/2015
// LAST UPDATE: 9/30/2015

#include "statsxx/machine_learning/neural_network/deep_belief_network/DBN.hpp"

// STL
#include <cmath>    // std::sqrt()
#include <vector>   // std::vector<>

// jScience
#include "jScience/linalg/Matrix.hpp" // Matrix<>
#include "jrandnum.hpp"               // rand_num_uniform_Mersenne_twister()

// jNeuralNet
#include "datasets.hpp" // DataSet, partition_data_set(), Matrix_to_DataSet()
 
// stats++
#include "statsxx/machine_learning/restricted_Boltzmann_machine/RBM.hpp"  // RBM

/*
// << jmm: temporary utility routine, because NEURAL_NET takes the DataSet structure >>
static DataSet Matrix_to_DataSet(
                                 const Matrix<double> &X,          // data
                                 const Matrix<double> &X_out       // data (output)
                                 );
*/

inline void neural_network::DBN::finetune(
                                          const int imethod,                // type of method to use for supervised MLP training
                                          // -----
                                          const Matrix<double> &X,          // data
                                          const Matrix<double> &X_out,      // data (output)
                                          const bool x_out_classif
                                          )
{
    
    
    // PASS ALL OF THE DATA THROUGH THE RBMs TO GET THE OUTPUT OF THE FINAL UNSUPERVISED LAYERS
    
    Matrix<double> X_RBMs(X.size(0), this->RBM.back().get_nhid());
    
    for(auto i = 0; i < X.size(0); ++i)
    {
        Vector<double> h = this->RBM_propagate(
                                               neural_network::DBN::PropagationType::deterministic,
                                               this->RBM.size(),
                                               X.row(i)
                                               );
        
        for(auto j = 0; j < h.size(); ++j)
        {
            X_RBMs(i,j) = h(j);
        }
    }
    
    // SETUP THE MLP
    std::vector<int> nhn;
    nhn.push_back(50);
    
    bool fully_connect = false;
    bool recurrent = false;
    this->MLP.create_MLP(this->RBM.back().get_nhid(), X_out.size(1), 0, std::vector<int>(), fully_connect, recurrent, x_out_classif);
//    this->MLP.create_MLP(this->RBM.back().get_nhid(), X_out.size(1), nhn.size(), nhn, fully_connect, recurrent, x_out_classif);
    
    // CONVERT DATA
    
    // convert the data from Matrix<> form to DataSet, and partition it into training, validataion, and generalization sets ...
    DataSet data_set = Matrix_to_DataSet(X_RBMs, X_out);
    
    std::vector<DataSet> data_sets = partition_data_set(data_set);
    
    // TRAIN THE MLP
    int itranf   = 0;     // do not scale the data at all (scaling/transformation deprecated in jNEURALNET)
    double S_tol = 1.0;   // use 100% of the data (same reason as above)
    bool silent  = false; // do not be silent
    
    //this->MLP.train(data_sets[0], data_sets[1], data_sets[2], imethod, itranf, S_tol, silent);
    
    if(imethod == 3)
    {
        for(int i = 0; i < 10; ++i)
        {
            this->MLP.train(data_sets[0], data_sets[1], data_sets[2], imethod, itranf, S_tol, silent);
        }
    }
    else
    {
        this->MLP.train(data_sets[0], data_sets[1], data_sets[2], imethod, itranf, S_tol, silent);
    }
    
/*
    //=========================================================
    // GET (ALL OF) THE RBM WEIGHTS
    //=========================================================
    
    // GET THE STACKED RBM WEIGHTS & BIASES
    Matrix<double> RBM_W;
    Vector<double> RBM_bias;
    std::tie(RBM_W, RBM_bias) = this->get_RBM_weights();

    // (RANDOMLY) INITIALIZE THE OUTPUT NEURON(S) WEIGHT(S) AND BIAS(ES)
    
    // initialize the output weights using the same way thet jNEURALNET does naturally
    // note: the +1 here and below is for the bias node
    double alpha = std::sqrt( 3.0/static_cast<double>(this->architecture[this->architecture.size()-2]+1) );
    
    int idx_start_out = RBM_W.size(0) - this->architecture.back();
    int idx_start_hid = idx_start_out - this->architecture[this->architecture.size()-2];
    
    // for each of the output nodes ...
    for(int i = 0; i < this->architecture.back(); ++i)
    {
        for(int j = 0; j < this->architecture[this->architecture.size()-2]; ++j)
        {
            // note: remember that we want to store in lower-triangular form
            RBM_W((idx_start_out+i),(idx_start_hid+j)) = rand_num_uniform_Mersenne_twister(-alpha, alpha);
        }
        
        RBM_bias(idx_start_out+i) = rand_num_uniform_Mersenne_twister(-alpha, alpha);
    }
    
    
    //=========================================================
    // USING THE RBM WEIGHTS, INITIALIZE THE MLP
    //=========================================================
    
    // create the MLP
    bool recurrent = false;
    
    this->MLP.create_MLP(this->architecture, recurrent, x_out_classif, RBM_W, RBM_bias);

    
    //=========================================================
    // CONVERT DATA
    //=========================================================
    
    // convert the data from Matrix<> form to DataSet, and partition it into training, validataion, and generalization sets ...
    DataSet data_set = Matrix_to_DataSet(X, X_out);
    
    std::vector<DataSet> data_sets = partition_data_set(data_set);
    
    
    //=========================================================
    // TRAIN THE MLP
    //=========================================================

    int itranf   = 0;     // do not scale the data at all (scaling/transformation deprecated in jNEURALNET)
    double S_tol = 1.0;   // use 100% of the data (same reason as above)
    bool silent  = false; // do not be silent
    
//    this->MLP.train(data_sets[0], data_sets[1], data_sets[2], imethod, itranf, S_tol, silent);
    
    if(imethod == 3)
    {
        for(int i = 0; i < 10; ++i)
        {
            this->MLP.train(data_sets[0], data_sets[1], data_sets[2], imethod, itranf, S_tol, silent);
        }
    }
    else
    {
        this->MLP.train(data_sets[0], data_sets[1], data_sets[2], imethod, itranf, S_tol, silent);
    }
*/
}

/*
static DataSet Matrix_to_DataSet(
                                 const Matrix<double> &X,          // data
                                 const Matrix<double> &X_out       // data (output)
                                 )
{
    DataSet data_set(X.size(0), 1, X.size(1), X_out.size(1));
    
    for(auto i = 0; i < X.size(0); ++i)
    {
        for(auto j = 0; j < X.size(1); ++j)
        {
            data_set.pt[i].in[0][j] = X(i,j);
        }
        
        for(auto j = 0; j < X_out.size(1); ++j)
        {
            data_set.pt[i].out[j] = X_out(i,j);
        }
    }
    
    return data_set;
}
*/
